Mostrar código
# Cargar librerías
library(dplyr)
library(lubridate)
library(ggplot2)
library(readr)
library(plotly)
library(zoo)This code was created using RStudio version 2024.12.1 by generating a Quarto document.
# Cargar librerías
library(dplyr)
library(lubridate)
library(ggplot2)
library(readr)
library(plotly)
library(zoo)# Load the data file
datos <- read.table("C:\\Users\\marin\\Documents\\Trabajo\\Dayos Guillaume\\Rstudio quarto\\Datos sensores3.csv", sep = ";", dec = ",", header = TRUE)
# Convert to numeric
datos$Vitesse <- as.numeric(datos$Vitesse)
datos$VEGA_E <- as.numeric(datos$VEGA_E)
# Save original values to compare later
datos$Vitesse_original <- datos$Vitesse # or save before filtering if you want to see the real change
# Convert VEGA_E to numeric, replacing commas with dots
datos <- datos %>% mutate(VEGA_E = as.numeric(gsub(",", ".", VEGA_E)))If there are more than three consecutive repeated values, they will be replaced with NaN.
# Detect sequences of repeated consecutive values
rle_vitesse <- rle(datos$Vitesse)
# Create a logical vector with TRUE in positions that should be replaced with NaN
to_replace <- inverse.rle(with(rle_vitesse, {
list(lengths = lengths, values = lengths > 3)
}))
# Replace those values with NaN
datos$Vitesse[to_replace] <- NaNA moving average is used, considering the temporal resolution, so the moving average window is 4 measurements (40 minutes). If the difference between the value and the moving average is greater than 30%, it is considered an outlier.
# Ensure Vitesse is numeric
datos$Vitesse <- as.numeric(datos$Vitesse)
# Calculate the moving average with a window of 4
media_movil <- zoo::rollapply(datos$Vitesse, width = 4, FUN = mean, align = "center", fill = NA)
# Calculate the relative difference
diferencia_relativa <- abs(datos$Vitesse - media_movil) / media_movil
# Identify outliers with more than 30% difference
outliers <- diferencia_relativa > 0.3
# Replace with NaN
datos$Vitesse[outliers] <- NaNThe maximum surface velocity in a natural river is 7 m/s and the minimum recommended limit is 0.02 m/s. Any value outside this range will be replaced with NaN.
# Apply the filter and assign NaN to out-of-range values
datos$Vitesse[datos$Vitesse < 20 | datos$Vitesse > 7000] <- NaNData with an SNR value below 10 dBm may be imprecise, and those below 6 are considered unreliable.
# Ensure that vitesse is numeric
datos$SNR_avg <- as.numeric(datos$SNR_avg)
# Filter and assign NaN to the 'Vitesse' variable when 'SNR' is below 10
datos$Vitesse[datos$SNR_avg < 10] <- NaN
# Save the final result with , as separator
write_delim(datos, "datos_velocidad_filtrados.csv", delim = ",")# Load the files
filtrados <- read_delim("datos_velocidad_filtrados.csv", delim = ",", col_types = cols(.default = "c"))
# Remove rows with NaN data
#datos_sin_NaN <- drop_na(datos)
# Convert DateTime to date type
datos$DateTime <- as.POSIXct(datos$DateTime, format = "%d/%m/%Y %H:%M")# Create the interactive plot
fig <- plot_ly()
fig <- fig %>% add_markers(
x = datos$DateTime,
y = datos$Vitesse_original,
name = "Original Surface velocity",
marker = list(
color = "gray",
size = 3
),
visible = "legendonly"
)
fig <- fig %>% add_markers(
x = datos$DateTime,
y = datos$Vitesse,
name = "Surface velocity",
marker = list(
color = "blue",
size = 3
)
)
fig <- fig %>% layout(
title = "Filtered surface velocity and original surface velocity",
xaxis = list(title = "Date", type = "date"),
yaxis = list(
title = "Surface velocity (mm/s)",
side = "left",
showgrid = FALSE
),
yaxis2 = list(
side = "right",
overlaying = "y",
showgrid = TRUE,
title_standoff = 30, # Adjust the distance between the title and the axis
ticklen = 6, # Adjust the length of the tick marks
tickangle = 0 # Adjust the angle
),
legend = list(
orientation = "h", # horizontal
x = 0, # aligned to the left
y = -0.2 # below the X axis
),
hovermode = "x",
width = 800,
height = 450,
margin = list(r = 100) # Adjust the right margin to give space to the secondary axis
)
# Display the interactive plot
fig# Sumar la cantidad de valores no NA en la columna 'Vitesse'
cantidad_vitesse <- sum(!is.na(datos$Vitesse))
cantidad_vitesse[1] 18209
# Sumar la cantidad de valores no NA en la columna 'Vitesse_original'
cantidad_vitesse_original <- sum(!is.na(datos$Vitesse_original))
cantidad_vitesse_original[1] 38483
# Calcular la diferencia en la cantidad de valores no NA entre ambas columnas
diferencia <- abs(cantidad_vitesse - cantidad_vitesse_original)
# Calcular el porcentaje de diferencia
porcentaje_diferencia <- (diferencia / cantidad_vitesse_original) * 100
# Imprimir los resultados
print(paste("Diferencia en cantidad de datos no NA:", diferencia))[1] "Diferencia en cantidad de datos no NA: 20274"
print(paste("Porcentaje de diferencia:", round(porcentaje_diferencia, 2), "%"))[1] "Porcentaje de diferencia: 52.68 %"
# Asegurarse de que la columna 'Date' esté en formato Date
datos$Date <- as.Date(datos$Date)
# Filtrar los datos entre el 27 de abril y el 3 de noviembre de 2024
datos_filtrados <- subset(datos, Date >= as.Date("2024-04-27") & Date <= as.Date("2024-11-03"))
# Sumar la cantidad de valores no NA en la columna 'Vitesse' para el periodo filtrado
cantidad_vitesse <- sum(!is.na(datos_filtrados$Vitesse))
cantidad_vitesse[1] 18209
# Sumar la cantidad de valores no NA en la columna 'Vitesse_original' para el periodo filtrado
cantidad_vitesse_original <- sum(!is.na(datos_filtrados$Vitesse_original))
cantidad_vitesse_original[1] 25741
# Calcular la diferencia en la cantidad de valores no NA entre ambas columnas
diferencia <- abs(cantidad_vitesse - cantidad_vitesse_original)
# Calcular el porcentaje de diferencia
porcentaje_diferencia <- (diferencia / cantidad_vitesse_original) * 100
# Imprimir los resultados
print(paste("Diferencia en cantidad de datos no NA (periodo):", diferencia))[1] "Diferencia en cantidad de datos no NA (periodo): 7532"
print(paste("Porcentaje de diferencia (periodo):", round(porcentaje_diferencia, 2), "%"))[1] "Porcentaje de diferencia (periodo): 29.26 %"
# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)
# Create the interactive plot with color by month
fig <- plot_ly(
data = df,
x = ~VEGA_E,
y = ~Vitesse,
type = 'scatter',
mode = 'markers',
colors = "Paired", # Color palette
marker = list(size = 6),
text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
hoverinfo = 'text'
)
# Customize layout
fig <- fig %>% layout(
title = "Surface velocity vs Water Level",
xaxis = list(title = "Water level (m)"),
yaxis = list(title = "Surface velocity (mm/s)"),
hovermode = "closest"
)
# Show plot
fig# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)
# Create the interactive plot with color by month
fig <- plot_ly(
data = df,
x = ~VEGA_E,
y = ~Vitesse,
type = 'scatter',
mode = 'markers',
color = ~Mes, # Grouping by month
colors = "Paired", # Color palette
marker = list(size = 6),
text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
hoverinfo = 'text'
)
# Customize layout
fig <- fig %>% layout(
title = "Surface velocity vs Water Level colored by month",
xaxis = list(title = "Water level (m)"),
yaxis = list(title = "Surface velocity (mm/s)"),
hovermode = "closest"
)
# Show plot
fig# Create a new data frame without NA/NaN values and with a Month column
df <- na.omit(datos[, c("DateTime", "VEGA_E", "Vitesse")])
df$Mes <- lubridate::month(df$DateTime, label = TRUE, abbr = TRUE)
# Order the data by DateTime to ensure the line follows the correct order
df <- df[order(df$DateTime), ]
# Create the interactive plot with color by month
fig <- plot_ly(
data = df,
x = ~VEGA_E,
y = ~Vitesse,
type = 'scatter',
mode = 'lines+markers', # Change to lines+markers to add a line
color = ~Mes, # Grouping by month
colors = "Paired", # Color palette
marker = list(size = 6),
text = ~paste("Date:", DateTime, "<br>Month:", Mes, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
hoverinfo = 'text'
)
# Customize layout
fig <- fig %>% layout(
title = "Surface velocity vs Water Level colored by month",
xaxis = list(title = "Water level (m)"),
yaxis = list(title = "Surface velocity (mm/s)"),
hovermode = "closest"
)
# Show plot
fig# Crear la columna lógica para detectar las crecidas
datos$Crecida <- datos$VEGA_E > 0.9
# Inicializar las columnas
datos$Evento <- NA_integer_
datos$EtiquetaEvento <- NA_character_
evento_id <- 0
en_crecida <- FALSE
fecha_inicio <- NA
evento_inicial_asignado <- FALSE # Variable para manejar el bloque inicial
for (i in 1:nrow(datos)) {
if (!evento_inicial_asignado) {
# Asignar un color o etiqueta a los datos previos a la crecida (bloque inicial)
evento_id <- 0 # Este es el "evento inicial", antes de que se alcance 0.9 m
datos$Evento[i] <- evento_id
datos$EtiquetaEvento[i] <- "2024-05-09 15:10:00"
# Cuando se alcanza el primer valor de crecida, se empieza a registrar los eventos
if (!is.na(datos$Crecida[i]) && datos$Crecida[i]) {
evento_inicial_asignado <- TRUE
fecha_inicio <- as.character(datos$DateTime[i]) # guardar la fecha del primer evento de crecida
}
} else {
if (!is.na(datos$Crecida[i]) && datos$Crecida[i]) {
if (!en_crecida) {
evento_id <- evento_id + 1
en_crecida <- TRUE
fecha_inicio <- as.character(datos$DateTime[i]) # guardar la fecha cuando comienza la crecida
}
} else {
en_crecida <- FALSE
}
if (!is.na(datos$Crecida[i]) && evento_id > 0) {
datos$Evento[i] <- evento_id
datos$EtiquetaEvento[i] <- fecha_inicio
}
}
}
# Convertir la etiqueta a un factor para que funcione en plotly
datos$EtiquetaEvento <- factor(datos$EtiquetaEvento)
# Filtrar los datos para la visualización
datos_plot <- subset(datos, !is.na(VEGA_E) & !is.na(Vitesse) & !is.na(EtiquetaEvento))fig <- plot_ly(
data = datos_plot,
x = ~VEGA_E,
y = ~Vitesse,
type = 'scatter',
mode = 'markers',
color = ~EtiquetaEvento,
colors = "Paired",
marker = list(size = 6),
text = ~paste("Date:", DateTime, "<br>Event since:", EtiquetaEvento, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
hoverinfo = 'text'
)
fig <- fig %>% layout(
title = "Surface velocity vs Water level colored by event (< 0.9 m)",
xaxis = list(title = "Water level (m)"),
yaxis = list(title = "Surface velocity (mm/s)"),
hovermode = "closest"
)
figfig <- plot_ly(
data = datos_plot,
x = ~VEGA_E,
y = ~Vitesse,
type = 'scatter',
mode = 'lines+markers',
color = ~EtiquetaEvento,
colors = "Paired",
marker = list(size = 6),
text = ~paste("Date:", DateTime, "<br>Event since:", EtiquetaEvento, "<br>VEGA_E:", VEGA_E, "<br>Vitesse:", Vitesse),
hoverinfo = 'text'
)
fig <- fig %>% layout(
title = "Surface velocity vs Water level colored by event (< 0.9 m)",
xaxis = list(title = "Water level (m)"),
yaxis = list(title = "Surface velocity (mm/s)"),
hovermode = "closest"
)
fig